###########################################
#  Primary Divisions: How Voters Evaluate Policy and Group Differences in Intra-Party Contests
#   - Forthcoming at The Journal of Politics
#   - Henderson et al 2021
#
###########################################
#  - code by S. Goggin & J. Henderson
########################################################
# This file produces in-party shared affinity choice results for Figure 3
########################################################
# inputs :: /data/data_matrix_scored.Rdata;
#				 :: /data/cces_stacked_unmatched.Rdata

# outputs ::
# -- Figure 3: 'Impact of Shared Attributes and Policy Agreement on Primary Vote Support'
# => main/figures/choice_primary_twoparty_affinity_separated_dd.pdf

# -- Supplementary Figures :: none

#write.csv(
#	list('n.rep'=nrow(reps),
# 		'n.rep.cluster'=length(unique(reps$respondent)),
# 		'n.dem'=nrow(dems),
# 		'n.dem.cluster'=length(unique(dems$respondent))
#	),
#	file='main/figures/choice_primary_twoparty_affinity_separated_dd.csv'
#)

#dirs="~/Dropbox/replication0/"
#dirs should be set here or in runR.R 

rm(list=ls()[which(ls()!='dirs')])

library(ggplot2)
library(stringr)

# messy function to reorder by some estimate value
lableOrder=function(xmat,labels,label.groups,omits,o.column){

	# denote which label is to be omitted on the label
	for(i in 1:length(omits)){
		labels[which(labels==omits[i])]=paste('omit',labels[which(labels==omits[i])],sep='_')
	}

	# break groups into levels
	un_group=unique(label.groups)

	# this is the item to sort on, typically global or independent
	xm=xmat[,o.column]

	# vector which will contain row order
	xo=1:length(xm)

	# rearranging roworder within level
	for(j in 1:length(un_group)){
		ix=which(label.groups==un_group[j])
		if(length(ix)>2){
			ix=ix[!grepl(labels[ix],pattern='omit')]
			xo[ix]=xo[ix][order(xm[ix])]
		}
	}
	return(xmat[xo,])
}

reOrder=function(x,o){
	ix=array(NA,nrow(x))
	for(i in 1:length(o)){
		ix[i]=which(x$iv_order==o[i])
	}
	return(x[ix,])
}

setwd(dirs)
load("data/cces_stacked_unmatched.Rdata")

###########################################
###First, need to stack based on candidates, not just candidate pairs (and also get text out for labels later)

#This has leaners as independents, which is incorrect
#cces_stacked$pid3clean <- ifelse(cces_stacked$pid3=="Democrat",-1,ifelse(cces_stacked$pid3=="Republican",1,0))

library(car)

load("data/data_matrix_scored.Rdata")
candidate_matrix=data_matrix
#candidate_matrix=read.csv("csv/data_matrix_scored.csv",header=T,stringsAsFactors=F)[,-c(1)]

###########################################
###Then, produce models w/ Clustered SEs

#Function from: http://scholar.byu.edu/jgubler/book/clustered-standard-errors-r
#Need this for clustered standard errors
clse.f <- function(dat,fm, cluster){
 require(sandwich)
 require(lmtest)
 not <- attr(fm$model,"na.action")
if( ! is.null(not)){
  cluster <- cluster[-not]
    dat <- dat[-not,]
}
 with(dat,{
 M <- length(unique(cluster))
 N <- length(cluster)
 K <- fm$rank
 dfc <- (M/(M-1))*((N-1)/(N-K))
 uj <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
 vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
 coeftest(fm, vcovCL)
 }
 )
}

############################################################
############################################################
### Primary Elections (by PID)

####

#candidate_matrix$tr=as.numeric(candidate_matrix$cq==1)
#candidate_matrix$tr[which(candidate_matrix$cq==4)]=NA

#candidate_matrix$ii_scores = candidate_matrix$scores*candidate_matrix$self_place

# include factional endorsement affinity == ff is non-policy identity affinity for groups
# include factional endorsement affinity == ee is policy identity affinity for groups
candidate_matrix$older_white = as.numeric(candidate_matrix$age>=45 & candidate_matrix$white==1)
kk=which(names(candidate_matrix)=='religion')[2]
candidate_matrix$christian = as.numeric(candidate_matrix[,kk]=='Protestant' | candidate_matrix[,kk]=='Roman Catholic' | candidate_matrix[,kk]=='Mormon')
candidate_matrix$militaryhh = as.numeric(candidate_matrix$"milstat_1"=='Yes' | candidate_matrix$"milstat_2"=='Yes' | candidate_matrix$"milstat_3"=='Yes' ) # might be too broad since it includes family
candidate_matrix$female_single = as.numeric(candidate_matrix$female==1 & candidate_matrix$married == 0)
candidate_matrix$black  = candidate_matrix$black
candidate_matrix$hisps = candidate_matrix$hisps
candidate_matrix$unionhh = candidate_matrix$unionhh
candidate_matrix$investor = candidate_matrix$investor

candidate_matrix$ff_reproductive = as.numeric(candidate_matrix$e_reproductive == 1 & candidate_matrix$female_single == 1)
candidate_matrix$ff_civilrights  = as.numeric(candidate_matrix$e_civilrights  == 1 & (candidate_matrix$black == 1 | candidate_matrix$hisps == 1))
candidate_matrix$ff_laborunions  = as.numeric(candidate_matrix$e_laborunions  == 1 & candidate_matrix$unionhh == 1)
candidate_matrix$ff_veterans     = as.numeric(candidate_matrix$e_veterans     == 1 & candidate_matrix$militaryhh == 1)
candidate_matrix$ff_business     = as.numeric(candidate_matrix$e_business     == 1 & candidate_matrix$investor == 1)
candidate_matrix$ff_taxreform    = as.numeric(candidate_matrix$e_taxreform    == 1 & candidate_matrix$investor == 1)
candidate_matrix$ff_christian    = as.numeric(candidate_matrix$e_christian    == 1 & candidate_matrix$christian == 1)
candidate_matrix$ff_teaparty     = as.numeric(candidate_matrix$e_teaparty     == 1 & candidate_matrix$older_white == 1)

candidate_matrix$ff_energy       = as.numeric(candidate_matrix$e_energy      == 1 & candidate_matrix$mip_env == 1)
candidate_matrix$ff_environment  = as.numeric(candidate_matrix$e_environment == 1 & candidate_matrix$mip_env  == 1)
candidate_matrix$ff_guncontrol   = as.numeric(candidate_matrix$e_guncontrol  == 1 & candidate_matrix$mip_guns == 1)
candidate_matrix$ff_gunrights    = as.numeric(candidate_matrix$e_gunrights   == 1 & candidate_matrix$mip_guns == 1)


vote <- subset(candidate_matrix,candidate_matrix$conjoints==3|candidate_matrix$conjoints==8)
vote$pty <- ifelse(vote$conjoints==3,0,1)

reps <- subset(vote, vote$pid3==1)
dems <- subset(vote, vote$pid3==-1)
inds <- subset(vote, vote$pid3==0)


#dv_choice is the More Similar variable
# pty is included for independents here, otherwise of course NAs out.

attach(reps)
primary_elec_reps <- lm(dv_choice~
pty +
ii_male + ii_female+
ii_white+ii_black+ii_hispanic+
ii_r_none+ii_r_protestant+ii_r_catholic+ii_r_evangelical+
ee_reproductive+ee_civilrights+ee_environment+ee_guncontrol+ee_laborunions+
ee_veterans+ee_business+ee_taxreform+ee_energy+ee_gunrights+ee_christian+ee_teaparty+
#ff_reproductive+ff_civilrights+ff_environment+ff_guncontrol+ff_laborunions+
#ff_veterans+ff_business+ff_taxreform+ff_energy+ff_gunrights+ff_christian+ff_teaparty+
ii_freetrade+ii_lgbt+ii_need+ii_citizenship+ii_righttochoose+ii_raisetaxes+ii_co2emissions+ii_policing+ii_reducemilitary+ii_guncontrol+
ii_unfairtrade+ii_govabuse+ii_criminals+ii_drilling+ii_cuttaxes+ii_bordersecurity+ii_strengthenmilitary+ii_gunrights+ii_unbornlives+ii_marriage
#ii_scores
,weights=1/wt)

primary_elec_reps_clse <- clse.f(reps,primary_elec_reps,respondent)
detach(reps)


attach(dems)
primary_elec_dems <- lm(dv_choice~
pty +
ii_male + ii_female+
ii_white+ii_black+ii_hispanic+
ii_r_none+ii_r_protestant+ii_r_catholic+ii_r_evangelical+
ee_reproductive+ee_civilrights+ee_environment+ee_guncontrol+ee_laborunions+
ee_veterans+ee_business+ee_taxreform+ee_energy+ee_gunrights+ee_christian+ee_teaparty+
#ff_reproductive+ff_civilrights+ff_environment+ff_guncontrol+ff_laborunions+
#ff_veterans+ff_business+ff_taxreform+ff_energy+ff_gunrights+ff_christian+ff_teaparty+
ii_freetrade+ii_lgbt+ii_need+ii_citizenship+ii_righttochoose+ii_raisetaxes+ii_co2emissions+ii_policing+ii_reducemilitary+ii_guncontrol+
ii_unfairtrade+ii_govabuse+ii_criminals+ii_drilling+ii_cuttaxes+ii_bordersecurity+ii_strengthenmilitary+ii_gunrights+ii_unbornlives+ii_marriage
#ii_scores
,weights=1/wt)

primary_elec_dems_clse <- clse.f(dems,primary_elec_dems,respondent)
detach(dems)

attach(inds)
primary_elec_inds <- lm(dv_choice~
pty +
ii_male + ii_female+
ii_white+ii_black+ii_hispanic+
ii_r_none+ii_r_protestant+ii_r_catholic+ii_r_evangelical+
ee_reproductive+ee_civilrights+ee_environment+ee_guncontrol+ee_laborunions+
ee_veterans+ee_business+ee_taxreform+ee_energy+ee_gunrights+ee_christian+ee_teaparty+
#ff_reproductive+ff_civilrights+ff_environment+ff_guncontrol+ff_laborunions+
#ff_veterans+ff_business+ff_taxreform+ff_energy+ff_gunrights+ff_christian+ff_teaparty+
ii_freetrade+ii_lgbt+ii_need+ii_citizenship+ii_righttochoose+ii_raisetaxes+ii_co2emissions+ii_policing+ii_reducemilitary+ii_guncontrol+
ii_unfairtrade+ii_govabuse+ii_criminals+ii_drilling+ii_cuttaxes+ii_bordersecurity+ii_strengthenmilitary+ii_gunrights+ii_unbornlives+ii_marriage
#ii_scores
,weights=1/wt)

primary_elec_inds_clse <- clse.f(inds,primary_elec_inds,respondent)
detach(inds)


#Now mashing it all together for ggplot

results_matrix <- cbind(primary_elec_dems$coefficients,primary_elec_inds$coefficients,primary_elec_reps$coefficients)
# remove pty coef
results_matrix=results_matrix[-c(2),]
results_matrix=cbind(results_matrix,NA,NA,NA)

for(j in 1:nrow(results_matrix)){
	ix=which(names(primary_elec_dems_clse[,2])==rownames(results_matrix)[j])
	if(length(ix)>0){
		results_matrix[j,4]=primary_elec_dems_clse[ix,2]
	}

	ix=which(names(primary_elec_inds_clse[,2])==rownames(results_matrix)[j])
	if(length(ix)>0){
		results_matrix[j,5]=primary_elec_inds_clse[ix,2]
	}

	ix=which(names(primary_elec_reps_clse[,2])==rownames(results_matrix)[j])
	if(length(ix)>0){
		results_matrix[j,6]=primary_elec_reps_clse[ix,2]
	}
}


results_matrix <- results_matrix[,c(1,4,2,5,3,6)]

colnames(results_matrix) <- c("d_estimate","d_se","i_estimate","i_se","r_estimate","r_se")
results_matrix <- results_matrix[2:nrow(results_matrix),]
results_matrix_withnames <- cbind(var=rownames(results_matrix),results_matrix)
rownames(results_matrix_withnames) <- seq(1,nrow(results_matrix),by=1)

#results_matrix_withnames[is.na(results_matrix_withnames)]<- 0
##Now inserting rows for the omitted levels

#omitted: male, white, no religion, attorney, decent, newspaper endorsements, record = helping constituents, raising taxes for both issues

#g_male <- c("g_male",rep(0,6))
#re_white <- c("re_white",rep(0,6))
#r_none <- c("r_none",rep(0,6))
#o_attorney <- c("o_attorney",rep(0,6))
#p_decent <- c("p_decent",rep(0,6))
#e_newspapers <- c("e_newspapers",rep(0,6))
#rec_help <- c("rec_help",rep(0,6))
#i_raisetaxes <- c("i1_raisetaxes",rep(0,6))
#i_freetrade <- c("i1_freetrade",rep(0,6))

#full_matrix <- rbind(
#g_male,
#results_matrix_withnames[1,],
#re_white,
#results_matrix_withnames[2:3,],
#r_none,
#results_matrix_withnames[4:6,],
#o_attorney,
#results_matrix_withnames[7:15,],
#p_decent,
#results_matrix_withnames[16:22,],
#e_newspapers,
#results_matrix_withnames[23:34,],
#rec_help,
#results_matrix_withnames[35:38,],
#i_freetrade,
#results_matrix_withnames[39:57,]
#)
full_matrix=results_matrix_withnames
rownames(full_matrix) <- full_matrix[,1]
full_matrix <- full_matrix[,2:ncol(full_matrix)]
full_matrix_clean <- apply(full_matrix,2,as.numeric)
rownames(full_matrix_clean) <- rownames(full_matrix)

labels <- c(
"Gender - Male",
"Gender - Female",
"Race - White",
"Race - Black",
"Race - Hispanic",
"Religion - None",
"Religion - Protestant",
"Religion - Catholic",
"Religion - Evangelical Protestant",
#"Occupation - Attorney",
#"Occupation - CEO",
#"Occupation - City Council Member",
#"Occupation - Factory Foreman",
#"Occupation - Farmer",
#"Occupation - Former US Army Major",
#"Occupation - Political Staffer",
#"Occupation - Small Business Owner",
#"Occupation - State Legislator",
#"Occupation - Teacher",
#"Personality - Decent",
#"Personality - Compassionate",
#"Personality - Empathetic",
#"Personality - Inspiring",
#"Personality - Intelligent",
#"Personality - Knowledgeable",
#"Personality - Moral",
#"Personality - Strong Leader",
#"Endorsements - Major area newspapers",
#"Endorsements - Reproductive rights groups",
#"Endorsements - Civil rights groups",
#"Endorsements - Environmental groups",
#"Endorsements - Gun control groups",
#"Endorsements - Labor unions",
#"Endorsements - Veterans groups",
#"Endorsements - Business groups",
#"Endorsements - Tax reform groups",
#"Endorsements - Energy groups",
#"Endorsements - Gun rights groups",
#"Endorsements - Christian groups",
#"Endorsements - Tea Party groups",
#"Record - Help my constituents get the benefits they deserve",
#"Record - Refuse to compromise my principles even when it means taking on my party",
#"Record - Secure appointment to a powerful legislative committee",
#"Record - Stand with my party to do what's right",
#"Record - Work across the aisle to get things done",
"Issue - Promote expanding free trade agreements",
"Issue - Defend the rights of LGBT individuals",
"Issue - Expand government and unemployment assistance for those in need",
"Issue - Provide a path to citizenship for undocumented immigrants",
"Issue - Protect a woman's right to choose",
"Issue - Raise taxes on those making more than $250,000 a year",
"Issue - Regulate CO2 emissions to combat global warming",
"Issue - Reform policing and stop racial profiling",
"Issue - Reduce the size of military and number of military bases",
"Issue - Strengthen gun control through commonsense restrictions",
"Issue - Protect jobs and industry from unfair foreign trade",
"Issue - Prevent and prosecute abuse of government assistance programs",
"Issue - Toughen sentences and penalties for criminals",
"Issue - Expand domestic oil and gas production through drilling",
"Issue - Cut taxes on income and capital gains for all",
"Issue - Strengthen border security to stop illegal immigration",
"Issue - Strengthen our military and national defense",
"Issue - Protect gun owners' rights to defend themselves and others",
"Issue - Protect the lives of the unborn",
"Issue - Defend traditional marriage and religious beliefs"#,
#"Overall Ideology Score"
)

full_matrix_clean=full_matrix_clean[!grepl(rownames(full_matrix_clean),pattern='ee_'),]

core_for_ggplot <- data.frame(labels=labels,full_matrix_clean)
core_for_ggplot$iv_order <- factor(core_for_ggplot$labels, as.character(core_for_ggplot$labels))

#core_for_ggplot=lableOrder(xmat=core_for_ggplot,labels,label.groups,omits,o.column)


###########################################
###Then, plot those models

#Doing the within-factor sorting in shitty hack
#core_for_ggplot <- read.csv("csv/choice_primary_core_for_ggplot.csv")
#o.order <- read.csv("csv/core_for_ggplot_global.csv")$iv_order

#core_for_ggplot=reOrder(x=core_for_ggplot,o=o.order)

attach(core_for_ggplot)
labels=core_for_ggplot$labels

ggplot_stacked <- data.frame(
labels=c(as.character(labels), as.character(labels), as.character(labels)),
PID=c(rep("Democrat",length(labels)),rep("Independent",length(labels)),rep("Republican",length(labels))),
estimate=c(d_estimate,i_estimate,r_estimate),
se=c(d_se,i_se,r_se))

#ggplot_stacked$labels <- factor(ggplot_stacked$labels,levels=ggplot_stacked$labels[order(ggplot_stacked$labels)])

ggplot_stacked=ggplot_stacked[!grepl(ggplot_stacked$PID,pattern='Independent'),]
#ggplot_stacked$labels <- factor(ggplot_stacked$labels,levels=ggplot_stacked$labels[order(ggplot_stacked$labels)])

detach(core_for_ggplot)


#ggplot_stacked=ggplot_stacked[-c(grep(ggplot_stacked[,1],pattern='Overall')),]
#labels=labels[-c(grep(labels,pattern='Overall'))]
#write.csv(ggplot_stacked,"csv/choice_primary_ggplot_stacked.csv")

attach(ggplot_stacked)

pd <- position_dodge(.5)
minci <- (ggplot_stacked$estimate - (1.96*ggplot_stacked$se))
maxci <- (ggplot_stacked$estimate + (1.96*ggplot_stacked$se))

ggplot_stacked$PID=gsub(ggplot_stacked$PID,pattern='Democrat',replace=paste(sep='','Democrat (N=',length(unique(dems$respondent)),')'))
ggplot_stacked$PID=gsub(ggplot_stacked$PID,pattern='Republican',replace=paste(sep='','Republican (N=',length(unique(reps$respondent)),')'))


pdf("main/figures/choice_primary_twoparty_affinity_separated_dd.pdf", width=9, height=7, pointsize=12)
ggplot(ggplot_stacked,
    aes(x=labels, y=estimate, colour=PID)) +
    scale_color_manual(values = c("blue4","firebrick1")) +
    scale_x_discrete(limits=rev(labels)) +
   facet_wrap(~PID,ncol=2) +
    geom_hline(yintercept=0,size=0.5,color="gray80") +
    geom_errorbar(width=.1, aes(ymin=minci, ymax=maxci),position=pd) +
    geom_point(shape=21, size=2,position=pd) +
    ylim(-0.31,0.31) +
    theme_bw() +
    theme(axis.text.y = element_text(hjust=0,color="gray40")) +
    theme(legend.position=c(-0.3,0.935),legend.title=element_text()) +
 	theme(legend.text = element_text(size = 10)) +
  #  geom_vline(xintercept=1.5,size=0.5,linetype="dashed") +
    geom_vline(xintercept=20.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=32.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=36.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=39.5,size=0.5,linetype="dashed") +
    coord_flip() +
    labs(x = "Experimentally Manipulated Variable", y = "Change Pr(Vote Choice)")
dev.off()


#system("convert -density 600 main/figures/choice_primary_twoparty_affinity_separated_ee.pdf -quality 300 main/figures/choice_primary_twoparty_affinity_separated_ee.png")

write.csv(
	list('n.rep'=nrow(reps),
 		'n.rep.cluster'=length(unique(reps$respondent)),
 		'n.dem'=nrow(dems),
 		'n.dem.cluster'=length(unique(dems$respondent))
	),
	file='main/figures/choice_primary_twoparty_affinity_separated_dd.csv'
)

#END
